Load needed libraries
library(tidyverse)
library(phyloseq)
library(vegan)
library(ggplot2)
library(ggtree)
library(gplots)
library(nlme)
library(emmeans)
library(ggpubr)
library(agricolae)
library(broom)
library(xtable)
inc.raw.physeq <- readRDS("data/RDS/incubation_physeq_Aug18.RDS")
inc.physeq <- subset_samples(inc.raw.physeq, day %in% c("7",
"14",
"21",
"35",
"49",
"97"))
#Rename treatments to more informative titles
data <- data.frame(sample_data(inc.physeq)) %>%
mutate(treatment = recode(treatment,
'Control' = 'Reference',
'CompAlfa' = 'Mix')) %>%
mutate(C_N = C_flash / N_flash, Inorganic_N = NH3 + NO3) %>%
mutate(TreatmentAndDay = paste(treatment, day))
data$treatment <- relevel(data$treatment, ref = "Reference")
data$day <- as.factor(data$day)
rownames(data) <- data$i_id
sample_data(inc.physeq) <- data
sample_data(inc.physeq)$day <- as.factor(sample_data(inc.physeq)$day)
inc.model.data <- lme(Inorganic_N~treatment * day, random=~1|replication
, data = data
, weights = varIdent(form= ~1|day*treatment)
, control = lmeControl(opt = "optim", msVerbose = TRUE))
## initial value 859.150640
## iter 10 value 678.308535
## iter 20 value 671.048851
## final value 671.045517
## converged
em <- emmeans(inc.model.data, c("day", "treatment"), data = data)
sum_em <- summary(em)
theme_set(theme_bw())
p <- ggplot(data = data, aes(x = day, y = Inorganic_N )) +
geom_point(aes(colour = treatment), size = 4) +
stat_summary(aes(group = treatment), fun.y = mean, geom = "line", size = 2, colour = "steelblue") +
geom_pointrange(size = 1, pch = 1, data = sum_em, aes(x = day, y = emmean, ymin = lower.CL, ymax = upper.CL, group = treatment)) +
xlab("Day") +
ylab("Inorganic Nitrogen") +
facet_wrap(~treatment) +
theme(
axis.title.y=element_text(colour = "black", size = 17, hjust = 0.5, margin=margin(0,12,0,0)),
axis.title.x=element_text(colour = "black", size = 17),
axis.text.x=element_text(colour = "black", size=15),
axis.text.y=element_text(colour = "black", size=15),
legend.position="none",
legend.text=element_text(size=12.5),
legend.key=element_blank(),
plot.title = element_text(face = "bold"),
strip.text.x=element_text(size=15)
)
tiff("Figures/inorganic_N_plot.tif",height=5,width=5,units='in',res=300)
p
dev.off()
## quartz_off_screen
## 2
p
em2 <- emmeans(inc.model.data, c("treatment", "day"), data = data)
lambdas <- list(
"Alfalfa - Reference" = c(-1, 1, rep(0, 24 - 2))
, "Mix - Reference" = c(-1, 0, 1, rep(0, 24 - 3))
, "Compost - Reference" = c(-1, 0, 0, 1, rep(0, 24 - 4))
, "Alfalfa - Reference" = c(rep(0, 4), -1, 1, rep(0, 24 - 6))
, "Mix - Reference" = c(rep(0, 4), -1, 0, 1, rep(0, 24 - 7))
, "Compost - Reference" = c(rep(0, 4), -1, 0, 0, 1, rep(0, 24 - 8))
, "Alfalfa - Reference" = c(rep(0, 8), -1, 1, rep(0, 24 - 10))
, "Mix - Reference" = c(rep(0, 8), -1, 0, 1, rep(0, 24 - 11))
, "Compost - Reference" = c(rep(0, 8), -1, 0, 0, 1, rep(0, 24 - 12))
, "Alfalfa - Reference" = c(rep(0, 12), -1, 1, rep(0, 24 - 14))
, "Mix - Reference" = c(rep(0, 12), -1, 0, 1, rep(0, 24 - 15))
, "Compost - Reference" = c(rep(0, 12), -1, 0, 0, 1, rep(0, 24 - 16))
, "Alfalfa - Reference" = c(rep(0, 16), -1, 1, rep(0, 24 - 18))
, "Mix - Reference" = c(rep(0, 16), -1, 0, 1, rep(0, 24 - 19))
, "Compost - Reference" = c(rep(0, 16), -1, 0, 0, 1, rep(0, 24 - 20))
, "Alfalfa - Reference" = c(rep(0, 20), -1, 1, rep(0, 24 - 22))
, "Mix - Reference" = c(rep(0, 20), -1, 0, 1, rep(0, 24 - 23))
, "Compost - Reference" = c(rep(0, 20), -1, 0, 0, 1)
)
sum_em2 <- summary(contrast(em2, lambdas), infer = c(TRUE, TRUE), adjust = "mvt")
sum_em2$day <- factor(rep(c(7, 14, 21, 35, 49, 97), each = 3))
theme_set(theme_bw())
p <- ggplot(data = sum_em2, aes(x = day, y = estimate)) +
geom_pointrange(aes(ymin = lower.CL, ymax = upper.CL, group = contrast, colour = contrast), size = 0.7) +
geom_hline(aes(yintercept = 0), linetype = 2) +
xlab("Day") +
ylab(paste('Inorganic nitrogen \n difference from reference')) +
scale_y_continuous(breaks = seq(-10, 800, 25)) +
facet_wrap(~contrast) +
theme(
axis.title.y=element_text(colour = "black", size = 17, hjust = 0.5, margin=margin(0,12,0,0)),
axis.title.x=element_text(colour = "black", size = 17),
axis.text.x=element_text(colour = "black", size=15),
axis.text.y=element_text(colour = "black", size=15),
legend.position="none",
legend.text=element_text(size=12.5),
legend.key=element_blank(),
plot.title = element_text(face = "bold"),
strip.text.x=element_text(size=15)
)
tiff("Figures/inorganic_N_plot_diff.tif",height=5,width=8,units='in',res=300)
p
dev.off()
## quartz_off_screen
## 2
p
#### Day 7
data <- data %>%
filter(day == 7)
g <-ggboxplot(data = data,x = "treatment"
, y = "Inorganic_N", color = "treatment"
, legend = "none") +
ylab("Inorganic Nitrogen") +
xlab("Treatment") +
ggtitle("Day 7") +
rotate_x_text(angle = 45) +
ylim(0, max(data$Inorganic_N) + 5) +
stat_compare_means(aes(label = ..p.signif..), method = "t.test", ref.group = "Reference", paired = TRUE) +
stat_compare_means(method = "anova", label.y = max(data$Inorganic_N))
g
# linear model
lm <- lm(Inorganic_N ~ treatment, data = data)
lm.summary <- summary(lm)
lm.summary
##
## Call:
## lm(formula = Inorganic_N ~ treatment, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0214 -0.3378 0.0121 0.3209 4.7046
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.5924 0.3019 15.211 < 2e-16 ***
## treatmentAlfalfa 0.9544 0.4270 2.235 0.030526 *
## treatmentMix -3.1199 0.4270 -7.307 4.07e-09 ***
## treatmentCompost -1.8226 0.4270 -4.269 0.000103 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.046 on 44 degrees of freedom
## Multiple R-squared: 0.7135, Adjusted R-squared: 0.694
## F-statistic: 36.53 on 3 and 44 DF, p-value: 5.209e-12
# ANOVA
av <- aov(lm)
# Results
av.summary <- summary(av)
av.summary
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 119.89 39.96 36.53 5.21e-12 ***
## Residuals 44 48.13 1.09
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# T.test x2
tukey <- TukeyHSD(av, conf.level = 0.95)
# more info from HSD.test from agricole package
tukey.1 <- HSD.test(av, trt = "treatment")
tukey.1
## $statistics
## MSerror Df Mean CV MSD
## 1.093845 44 3.595319 29.08978 1.140025
##
## $parameters
## test name.t ntr StudentizedRange alpha
## Tukey treatment 4 3.775958 0.05
##
## $means
## Inorganic_N std r Min Max Q25 Q50
## Alfalfa 5.546750 1.9532372 12 2.525333 10.251333 4.294833 5.681833
## Compost 2.769750 0.4011056 12 2.080333 3.471333 2.517333 2.789833
## Mix 1.472417 0.3204056 12 1.010333 2.116333 1.315833 1.411333
## Reference 4.592361 0.5447008 12 3.832333 5.535333 4.140083 4.730667
## Q75
## Alfalfa 6.332333
## Compost 3.034083
## Mix 1.494583
## Reference 4.892583
##
## $comparison
## NULL
##
## $groups
## Inorganic_N groups
## Alfalfa 5.546750 a
## Reference 4.592361 a
## Compost 2.769750 b
## Mix 1.472417 c
##
## attr(,"class")
## [1] "group"
# latex tables
tukey.table.letters <- xtable(tukey.1$groups)
anova.table <- xtable(summary(av))
tukey.table <- xtable(tidy(tukey))
plot(lm)
# This object has day 0 and amendement samples removed and is rarefied to 6000
physeq <- readRDS("data/IncPhyseqRareClusteredTree")
plot_richness(physeq)
shannon <- plot_richness(physeq, measures = "Shannon")
shannon.df <- shannon$data
# load summary function
source("Functions/summarySE.R")
summary.shannon.df <- summarySE(shannon.df, measurevar = "value", groupvars = c("treatment", "day"))
pd <- position_dodge(0.2) # move them .05 to the left and right
ggplot(summary.shannon.df, aes(x=day, y=value, colour=treatment)) +
geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.1, position=pd) +
geom_point(position=pd) +
ggtitle("Shannon diversity")
Now let’s plot an PCA and NMDS ordination of the weighted unifrac distances, which uses the phylogenetic tree and considers the phylogenetic relationship between OTUs when calculating the distance matrix
PCoA <- ordinate(physeq, "PCoA", "wunifrac")
plot_ordination(physeq, PCoA, color = "day") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = day))
plot_ordination(physeq, PCoA, color = "treatment") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = treatment))
NMDS <- ordinate(physeq, "NMDS", "wunifrac")
## Run 0 stress 0.1167217
## Run 1 stress 0.1249565
## Run 2 stress 0.1249576
## Run 3 stress 0.1249551
## Run 4 stress 0.1249564
## Run 5 stress 0.1167217
## ... Procrustes: rmse 1.048732e-05 max resid 0.0001520216
## ... Similar to previous best
## Run 6 stress 0.1167203
## ... New best solution
## ... Procrustes: rmse 0.0001684605 max resid 0.002237804
## ... Similar to previous best
## Run 7 stress 0.1249608
## Run 8 stress 0.1167202
## ... New best solution
## ... Procrustes: rmse 0.0001419662 max resid 0.002166051
## ... Similar to previous best
## Run 9 stress 0.1167218
## ... Procrustes: rmse 0.0001150959 max resid 0.00172459
## ... Similar to previous best
## Run 10 stress 0.1249589
## Run 11 stress 0.1167216
## ... Procrustes: rmse 0.0001728815 max resid 0.002268143
## ... Similar to previous best
## Run 12 stress 0.1249551
## Run 13 stress 0.1167217
## ... Procrustes: rmse 0.0001661858 max resid 0.002256404
## ... Similar to previous best
## Run 14 stress 0.1167227
## ... Procrustes: rmse 0.0002688514 max resid 0.003219877
## ... Similar to previous best
## Run 15 stress 0.1249594
## Run 16 stress 0.12496
## Run 17 stress 0.4175545
## Run 18 stress 0.1249623
## Run 19 stress 0.1167217
## ... Procrustes: rmse 0.0001990821 max resid 0.002287902
## ... Similar to previous best
## Run 20 stress 0.1167207
## ... Procrustes: rmse 6.55357e-05 max resid 0.0009958961
## ... Similar to previous best
## *** Solution reached
plot_ordination(physeq, NMDS, color = "day") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = day))
plot_ordination(physeq, NMDS, color = "treatment") + stat_ellipse(geom = "polygon", type = "norm", alpha = 0.2, aes(fill = treatment))
Table with # of aliens for each treatment and what period of the incubation they were detected, early/late or throughout.
The lists of alien OTUs detected in amended microcosms was generated with , we can load those lists along with the phyloseq object with clustering information from :
alf.aliens <- readRDS("data/alf.aliens.rds")
comp.aliens <- readRDS("data/comp.aliens.rds")
mix.aliens <- readRDS("data/mix.aliens.rds")
# Read in the phylsoeq object from the clustering script
alf <- prune_samples(sample_data(physeq)$treatment %in% c("Alfalfa"), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T)
early.alf <- prune_samples(sample_data(alf)$response.group %in% c("early"), alf) %>%
filter_taxa(function(x) sum(x) > 1, T)
late.alf <- prune_samples(sample_data(alf)$response.group %in% c("late"), alf) %>%
filter_taxa(function(x) sum(x) > 1, T)
number.aliens.early.alf <- prune_taxa(as.character(alf.aliens), early.alf) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.late.alf <- prune_taxa(as.character(alf.aliens), late.alf) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.total.alf <- prune_taxa(as.character(alf.aliens), alf) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
# print("Number of alfalfa alien OTUs detected in all alfalfa")
# number.aliens.total.alf
# print("Total number of OTUs detected in all alfalfa")
# nrow(otu_table(alf))
# print("Number of alfalfa alien OTUs detected in early alfalfa")
# number.aliens.early.alf
# print("Total number of OTUs detected in early alfalfa")
# nrow(otu_table(early.alf))
# print("Number of alfalfa alien OTUs detected in late alfalfa")
# number.aliens.late.alf
# print("Total number of OTUs detected in late alfalfa")
# nrow(otu_table(late.alf))
comp <- prune_samples(sample_data(physeq)$treatment %in% c("Compost"), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T)
early.comp <- prune_samples(sample_data(comp)$response.group %in% c("early"), comp) %>%
filter_taxa(function(x) sum(x) > 1, T)
late.comp <- prune_samples(sample_data(comp)$response.group %in% c("late"), comp) %>%
filter_taxa(function(x) sum(x) > 1, T)
number.aliens.early.comp <- prune_taxa(as.character(comp.aliens), early.comp) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.late.comp <- prune_taxa(as.character(comp.aliens), late.comp) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.total.comp <- prune_taxa(as.character(comp.aliens), comp) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
# print("Number of Compost alien OTUs detected in all Compost")
# number.aliens.total.comp
# print("Total number of OTUs detected in all Compost")
# nrow(otu_table(comp))
# print("Number of Compost alien OTUs detected in early Compost")
# number.aliens.early.comp
# print("Total number of OTUs detected in early Compost")
# nrow(otu_table(early.comp))
# print("Number of Compost alien OTUs detected in late Compost")
# number.aliens.late.comp
# print("Total number of OTUs detected in late Compost")
# nrow(otu_table(late.comp))
mix <- prune_samples(sample_data(physeq)$treatment %in% c("Mix"), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T)
early.mix <- prune_samples(sample_data(mix)$response.group %in% c("early"), mix) %>%
filter_taxa(function(x) sum(x) > 1, T)
late.mix <- prune_samples(sample_data(mix)$response.group %in% c("late"), mix) %>%
filter_taxa(function(x) sum(x) > 1, T)
number.aliens.early.mix <- prune_taxa(as.character(mix.aliens), early.mix) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.late.mix <- prune_taxa(as.character(mix.aliens), late.mix) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
number.aliens.total.mix <- prune_taxa(as.character(mix.aliens), mix) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
otu_table() %>%
nrow()
# print("Number of Mix alien OTUs detected in all Mix")
# number.aliens.total.mix
# print("Total number of OTUs detected in all Mix")
# nrow(otu_table(mix))
# print("Number of Mix alien OTUs detected in early Mix")
# number.aliens.early.mix
# print("Total number of OTUs detected in early Mix")
# nrow(otu_table(early.mix))
# print("Number of Mix alien OTUs detected in late Mix")
# number.aliens.late.mix
# print("Total number of OTUs detected in late Mix")
# nrow(otu_table(late.mix))
library(knitr)
a <- c("Alfalfa", number.aliens.total.alf, number.aliens.early.alf, number.aliens.late.alf)
b <- c("Compost", number.aliens.total.comp, number.aliens.early.comp, number.aliens.late.comp)
c <- c("Mix", number.aliens.total.mix, number.aliens.early.mix, number.aliens.late.mix)
x <- as.data.frame(rbind(a,b,c))
colnames(x) <- c("Treatment", "Throughout", "Eary", "Late")
rownames(x) <- NULL
kable(x, caption = "Number of OTUs considered aliens detected in each treatment")
| Treatment | Throughout | Eary | Late |
|---|---|---|---|
| Alfalfa | 26 | 25 | 4 |
| Compost | 74 | 60 | 28 |
| Mix | 58 | 38 | 20 |
Function to get a list of OTUs from a sample type
GetOTUs <- function(physeq, samples) {
prune_samples(sample_data(physeq)$treatment %in% c(samples), physeq) %>%
filter_taxa(function(x) sum(x) > 1, T) %>%
tax_table() %>%
row.names()
}
Using function to generate list of OTUs from all starting soil, incubated soils and amendments
physeq.raw <- readRDS("data/RDS/incubation_physeq_Aug18.RDS")
# Incubated samples:
Alfalfa.otus <- GetOTUs(physeq.raw, c("Alfalfa"))
Compost.otus <- GetOTUs(physeq.raw, c("Compost"))
CompAlfa.otus <- GetOTUs(physeq.raw, c("CompAlfa"))
Control.otus <- GetOTUs(physeq.raw, c("Control"))
# Amendment samples:
AlfalfaAmend.otus <- GetOTUs(physeq.raw, c("AlfalfaAmend"))
CompostAmend.otus <- GetOTUs(physeq.raw, c("CompostAmend"))
# Soil sample:
AlfalfaSoil.otus <- GetOTUs(physeq.raw, c("AlfalfaSoil"))
GetAlienHeatMap <- function(physeq, control_otus, alien_otus, recieving_otus, samples){
otus <- list(alien_otus, control_otus)
print("Looking for aliens between amendment and control soil")
venn <- venn(otus)
alf.aliens <- attr(venn, "intersections")$A
aliens <- list(alf.aliens, recieving_otus)
print("Detecting aliens in amended soil")
aliens.venn <- venn(aliens)
aliens.detected <- attr(aliens.venn,"intersections")$`A:B`
incubated <- prune_samples(sample_data(physeq)$treatment %in% c(samples), physeq) %>%
filter_taxa(function(x) sum(x) > 5, T) %>%
transform_sample_counts(function(x) x / sum(x))
incubated.aliens <- prune_taxa(aliens.detected, incubated)
sample.order <- as.data.frame(sample_data(incubated.aliens)) %>%
arrange(day, replication) %>%
select(i_id) %>%
remove_rownames()
alien.heatmap <- plot_heatmap(incubated.aliens, sample.label = "day", taxa.order= "Phylum", taxa.label = "Genus",
sample.order = as.character(sample.order$i_id),
low = "#66CCFF", high = "#000033", na.value = "white")
alien.heatmap
}
alf.heatmap <- GetAlienHeatMap(physeq, Control.otus, AlfalfaAmend.otus, Alfalfa.otus, c("Alfalfa"))
## [1] "Looking for aliens between amendment and control soil"
## [1] "Detecting aliens in amended soil"
alf.heatmap
comp.heatmap <- GetAlienHeatMap(physeq, Control.otus, CompostAmend.otus, Compost.otus, c("Compost"))
## [1] "Looking for aliens between amendment and control soil"
## [1] "Detecting aliens in amended soil"
comp.heatmap
I didn’t extract DNA from a mixed sample of compost and alfalfa, to get the list of potential aliens, we will combine the two lists from alfalfa and compost.
mix <- c(AlfalfaAmend.otus, CompostAmend.otus)
mix.heatmap <- GetAlienHeatMap(physeq, Control.otus, mix, CompAlfa.otus, c("Mix"))
## [1] "Looking for aliens between amendment and control soil"
## [1] "Detecting aliens in amended soil"
mix.heatmap
The relative abundance of the alien OTUs through the incubation is relatively low, but different between treatments. Could low abundance OTUs be drivers in this situation? We should compare the relative abundance of the dominant OTUs from each treatment.
Let’s make another heatmap, this one will have a list of OTUs from a treatment that is composed of the top ten OTUs by relative abundance from each day.
Day_top10 <- function(physeq, trt, days){
trt <- prune_samples(sample_data(physeq)$treatment %in% c(trt), physeq)
d0 <- subset_samples(trt, day == days[1])
l0 <- names(sort(taxa_sums(d0), TRUE)[1:10])
d7 <- subset_samples(trt, day == days[2])
l7 <- names(sort(taxa_sums(d7), TRUE)[1:10])
d14 <- subset_samples(trt, day == days[3])
l14 <- names(sort(taxa_sums(d14), TRUE)[1:10])
d21 <- subset_samples(trt, day == days[4])
l21 <- names(sort(taxa_sums(d21), TRUE)[1:10])
d35 <- subset_samples(trt, day == days[5])
l35 <- names(sort(taxa_sums(d35), TRUE)[1:10])
d49 <- subset_samples(trt, day == days[6])
l49 <- names(sort(taxa_sums(d49), TRUE)[1:10])
d97 <- subset_samples(trt, day == days[7])
l97 <- names(sort(taxa_sums(d97), TRUE)[1:10])
list <- c(l0, l7, l14, l21, l35, l49, l97)
list
phy <- prune_taxa(list, trt) %>%
filter_taxa(function(x) sum(x) > 5, T) %>%
transform_sample_counts(function(x) x / sum(x))
sample.order <- as.data.frame(sample_data(phy)) %>%
arrange(day, replication) %>%
select(i_id) %>%
remove_rownames()
heatmap <- plot_heatmap(phy, sample.label = "day", taxa.order= "Phylum", taxa.label = "Genus",
sample.order = as.character(sample.order$i_id),
low = "#66CCFF", high = "#000033", na.value = "white")
heatmap
}
Not rarefied for these heatmaps:
days <- c("0", "7", "14", "21", "35", "49", "97")
alf <- Day_top10(physeq.raw, c("Alfalfa"), days)
alf
Compost:
comp <- Day_top10(physeq.raw, c("Compost"), days)
comp
Mix:
mix <- Day_top10(physeq.raw, c("CompAlfa"), days)
mix
What are the common top OTUs from the three treatments? By treatment, the total number of OTUs made up of each treatment + day’s top 10 was different. levels(comp$data$OTU) will show you the unique OTUs.
venn(list("Alfalfa" = levels(alf$data$OTU), "Compost" = levels(comp$data$OTU), "Mix" = levels(mix$data$OTU)))